Changeset: 8a73b460fa18 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8a73b460fa18 Modified Files: gdk/gdk_arrays.c gdk/gdk_arrays.h monetdb5/modules/kernel/arrays.c Branch: arrays Log Message:
changing code to use new structure + leftfecth join takes dimCands and oidCands and should handle them itself diffs (truncated from 319 to 300 lines): diff --git a/gdk/gdk_arrays.c b/gdk/gdk_arrays.c --- a/gdk/gdk_arrays.c +++ b/gdk/gdk_arrays.c @@ -222,6 +222,36 @@ BAT* materialise_nonDimensional_column(i return b; } +BAT *projectCells(gdk_array* dims, BAT* oidsBAT) { + BAT *resBAT = NULL; + BUN resSize = 1; + oid *resOIDs = NULL; + dim_node *n; + + /*combine the oidsDimensions in order to get the global oids (the cells)*/ + for(n=dims->h; n; n=n->next) { + BUN sz = n->data->elementsNum; + if(sz > 0) + resSize *= sz; + else + resSize *= n->data->initialElementsNum; + } + resSize += BATcount(oidsBAT); //this is not accurate but I believe it is ok + //fprintf(stderr, "estiamted size = %u\n", (unsigned int)resSize); + /*the size of the result is the same as the number of cells in the candidatesDimensions */ + if(!(resBAT = BATnew(TYPE_void, TYPE_oid, resSize, TRANSIENT))) + return NULL; + resOIDs = (oid*)Tloc(resBAT, BUNfirst(resBAT)); + resSize = qualifyingOIDs(0, 1, dims, oidsBAT, &resOIDs); + //fprintf(stderr, "real size = %u\n", (unsigned int)resSize); + BATsetcount(resBAT, resSize); + BATseqbase(resBAT, 0); + BATderiveProps(resBAT, FALSE); + + return resBAT; +} + + #if 0 gdk_cells* cells_new(void) { gdk_cells *cells = GDKmalloc(sizeof(gdk_cells)); @@ -1930,35 +1960,6 @@ static BUN qualifyingOIDs(int dimNum, in return sz; } -BAT *projectCells(gdk_cells* dims, BAT* oidsBAT) { - BAT *resBAT = NULL; - BUN resSize = 1; - oid *resOIDs = NULL; - dim_node *n; - - /*combine the oidsDimensions in order to get the global oids (the cells)*/ - for(n=dims->h; n; n=n->next) { - BUN sz = n->data->elementsNum; - if(sz > 0) - resSize *= sz; - else - resSize *= n->data->initialElementsNum; - } - resSize += BATcount(oidsBAT); //this is not accurate but I believe it is ok - //fprintf(stderr, "estiamted size = %u\n", (unsigned int)resSize); - /*the size of the result is the same as the number of cells in the candidatesDimensions */ - if(!(resBAT = BATnew(TYPE_void, TYPE_oid, resSize, TRANSIENT))) - return NULL; - resOIDs = (oid*)Tloc(resBAT, BUNfirst(resBAT)); - resSize = qualifyingOIDs(0, 1, dims, oidsBAT, &resOIDs); - //fprintf(stderr, "real size = %u\n", (unsigned int)resSize); - BATsetcount(resBAT, resSize); - BATseqbase(resBAT, 0); - BATderiveProps(resBAT, FALSE); - - return resBAT; -} - /*takes a set of dimensions of any type and returns the corresponding dimensions in indices format */ gdk_cells* arrayToCells(gdk_array *array) { diff --git a/gdk/gdk_arrays.h b/gdk/gdk_arrays.h --- a/gdk/gdk_arrays.h +++ b/gdk/gdk_arrays.h @@ -322,8 +322,8 @@ gdk_return dimensionBATsubjoin(BAT **out #endif /*NEW*/ +gdk_export BAT *projectCells(gdk_array* dims, BAT* oidsBAT); #if 0 -gdk_export BAT *projectCells(gdk_cells* dims, BAT* oidsBAT); gdk_export gdk_cells* arrayToCells(gdk_array *array); gdk_export gdk_array *cellsToArray(gdk_cells *cells); #endif diff --git a/monetdb5/modules/kernel/arrays.c b/monetdb5/modules/kernel/arrays.c --- a/monetdb5/modules/kernel/arrays.c +++ b/monetdb5/modules/kernel/arrays.c @@ -17,25 +17,27 @@ static int arrayCellsNum(gdk_array *arra return jumpSize(array, array->dimsNum); } -static BUN oidToIdx(oid oidVal, int dimNum, int currentDimNum, BUN skipCells, gdk_array *dims) { +/*UPDATED*/ +static BUN oidToIdx(oid oidVal, int dimNum, int currentDimNum, BUN skipCells, gdk_array *array) { BUN oid = 0; while(currentDimNum < dimNum) { - skipCells*=dims->dimSizes[currentDimNum]; + skipCells*=array->dims[currentDimNum]->elsNum; currentDimNum++; } - if(currentDimNum == dims->dimsNum-1) + if(currentDimNum == array->dimsNum-1) oid = oidVal; else - oid = oidToIdx(oidVal, dimNum, currentDimNum+1, skipCells*dims->dimSizes[currentDimNum], dims); + oid = oidToIdx(oidVal, dimNum, currentDimNum+1, skipCells*array->dims[currentDimNum]->elsNum, array); if(currentDimNum == dimNum) //in the last one we do not compute module return oid/skipCells; return oid%skipCells; } -static BUN* oidToIdx_bulk(oid* oidVals, int valsNum, int dimNum, int currentDimNum, BUN skipCells, gdk_array *dims) { +/*UPDATED*/ +static BUN* oidToIdx_bulk(oid* oidVals, int valsNum, int dimNum, int currentDimNum, BUN skipCells, gdk_array *array) { BUN *oids = GDKmalloc(valsNum*sizeof(BUN)); int i; @@ -45,11 +47,11 @@ static BUN* oidToIdx_bulk(oid* oidVals, } while(currentDimNum < dimNum) { - skipCells*=dims->dimSizes[currentDimNum]; + skipCells*=array->dims[currentDimNum]->elsNum; currentDimNum++; } - if(currentDimNum == dims->dimsNum-1) { //last dimension, do not go any deeper + if(currentDimNum == array->dimsNum-1) { //last dimension, do not go any deeper if(currentDimNum == dimNum) {//in the dimension of interest we do not compute the module for(i=0; i<valsNum; i++) oids[i] = oidVals[i]/skipCells; @@ -59,7 +61,7 @@ static BUN* oidToIdx_bulk(oid* oidVals, } } else { - BUN *oidRes = oidToIdx_bulk(oidVals, valsNum, dimNum, currentDimNum+1, skipCells*dims->dimSizes[currentDimNum], dims); + BUN *oidRes = oidToIdx_bulk(oidVals, valsNum, dimNum, currentDimNum+1, skipCells*array->dims[currentDimNum]->elsNum, array); if(currentDimNum == dimNum) {//in the dimension of interest we do not compute the module for(i=0; i<valsNum; i++) @@ -353,7 +355,7 @@ static bool updateCandidateResults(gdk_a return 1; } -str ALGdimensionSubselect2(ptr *dimsRes, bat* oidsRes, const ptr *dim, const ptr* dims, const ptr *dimsCand, const bat* oidsCand, +str ALGdimensionSubselect2(ptr *dimsRes, bat* oidsRes, const ptr *dim, const ptr* dims, const ptr *dimCands, const bat* oidCands, const void *low, const void *high, const bit *li, const bit *hi, const bit *anti) { gdk_array *array = (gdk_array*)*dims; gdk_analytic_dimension *dimension = (gdk_analytic_dimension*)*dim; @@ -364,7 +366,7 @@ str ALGdimensionSubselect2(ptr *dimsRes, int type; const void* nil; - readCands(&dimCands_in, &candidatesBAT_in, dimsCand, oidsCand, array); + readCands(&dimCands_in, &candidatesBAT_in, dimCands, oidCands, array); if(!dimCands_in) { //empty results if(candidatesBAT_in) @@ -512,7 +514,7 @@ str ALGdimensionSubselect2(ptr *dimsRes, return emptyCandidateResults(dimsRes, oidsRes); } - if(oidsCand && candidatesBAT_in != candidatesBAT_out) //there was a candidatesBAT in the input that is not sent in the output + if(oidCands && candidatesBAT_in != candidatesBAT_out) //there was a candidatesBAT in the input that is not sent in the output BBPunfix(candidatesBAT_in->batCacheid); BBPkeepref(*oidsRes = candidatesBAT_out->batCacheid); @@ -526,7 +528,7 @@ str ALGdimensionSubselect1(ptr *dimsRes, return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, NULL, NULL, low, high, li, hi, anti); } -str ALGdimensionThetasubselect2(ptr *dimsRes, bat* oidsRes, const ptr *dim, const ptr* dims, const ptr *dimsCand, const bat* oidsCand, const void *val, const char **opp) { +str ALGdimensionThetasubselect2(ptr *dimsRes, bat* oidsRes, const ptr *dim, const ptr* dims, const ptr *dimCands, const bat* oidCands, const void *val, const char **opp) { bit li = 0; bit hi = 0; bit anti = 0; @@ -538,42 +540,42 @@ str ALGdimensionThetasubselect2(ptr *dim /* "=" or "==" */ li = hi = 1; anti = 0; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, val, nil, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, val, nil, &li, &hi, &anti); } if (op[0] == '!' && op[1] == '=' && op[2] == 0) { /* "!=" (equivalent to "<>") */ li = hi = anti = 1; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, val, nil, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, val, nil, &li, &hi, &anti); } if (op[0] == '<') { if (op[1] == 0) { /* "<" */ li = hi = anti = 0; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, nil, val, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, nil, val, &li, &hi, &anti); } if (op[1] == '=' && op[2] == 0) { /* "<=" */ li = anti = 0; hi = 1; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, nil, val, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, nil, val, &li, &hi, &anti); } if (op[1] == '>' && op[2] == 0) { /* "<>" (equivalent to "!=") */ li = hi = anti = 1; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, val, nil, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, val, nil, &li, &hi, &anti); } } if (op[0] == '>') { if (op[1] == 0) { /* ">" */ li = hi = anti = 0; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, val, nil, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, val, nil, &li, &hi, &anti); } if (op[1] == '=' && op[2] == 0) { /* ">=" */ li = 1; hi = anti = 0; - return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimsCand, oidsCand, val, nil, &li, &hi, &anti); + return ALGdimensionSubselect2(dimsRes, oidsRes, dim, dims, dimCands, oidCands, val, nil, &li, &hi, &anti); } } @@ -584,11 +586,11 @@ str ALGdimensionThetasubselect1(ptr *dim return ALGdimensionThetasubselect2(dimsRes, oidsRes, dim, dims, NULL, NULL, val, op); } -str ALGdimensionLeftfetchjoin1(bat *result, const ptr *dimsCands, const bat *batCands, const ptr *dim, const ptr *dims) { +str ALGdimensionLeftfetchjoin1(bat *result, const ptr *dimCands, const bat *oidCands, const ptr *dim, const ptr *dims) { gdk_array *array = (gdk_array*)*dims; gdk_analytic_dimension *dimension = (gdk_analytic_dimension*)*dim; - gdk_cells *candidateDimensions = *dimCands; - BAT *candsBAT = NULL, *resBAT = NULL; + gdk_array *dimCands_in = (gdk_array*)*dimCands; + BAT *oidCandsBAT = NULL, *candsBAT = NULL, *resBAT = NULL; BUN resSize = 0; #define computeValues(TPE) \ @@ -612,10 +614,15 @@ do { \ } while(0) - if ((candsBAT = BATdescriptor(*batCands)) == NULL) { - throw(MAL, "algebra.dimensionLeftfetchjoin", RUNTIME_OBJECT_MISSING); + if ((oidCandsBAT = BATdescriptor(*oidCands)) == NULL) { + throw(MAL, "algebra.leftfetchjoin", RUNTIME_OBJECT_MISSING); } + +//TODO: Find a more clever way to do this without the need to project the cells + //create the oids using the candidates + candsBAT = projectCells(dimCands_in, oidCandsBAT); resSize = BATcount(candsBAT); + /*for each oid in the candsBAT find the real value of the dimension */ switch(dimension->type) { \ case TYPE_bte: \ @@ -656,17 +663,19 @@ do { \ /*should I release space or MonetDB does it for the input?*/ analyticDimensionDelete(dimension); arrayDelete(array); + BBPunfix(candsBAT->batCacheid); + BBPunfix(oidCandsBAT->batCacheid); return MAL_SUCCEED; } -str ALGnonDimensionLeftfetchjoin1(bat* result, const ptr *dimsCands, const bat *batCands, const bat *vals, const ptr *dims) { +str ALGnonDimensionLeftfetchjoin1(bat* result, const ptr *dimCands, const bat *oidCands, const bat *vals, const ptr *dims) { /* projecting a non-dimensional column does not differ from projecting any relational column */ - BAT *candsBAT, *valsBAT, *resBAT= NULL; - + BAT *oidCandsBAT, *candsBAT, *valsBAT, *resBAT= NULL; + gdk_array *dimCands_in = (gdk_array*)*dimCands; (void)*dims; - if ((candsBAT = BATdescriptor(*cands)) == NULL) { + if ((oidCandsBAT = BATdescriptor(*oidCands)) == NULL) { throw(MAL, "algebra.leftfetchjoin", RUNTIME_OBJECT_MISSING); } if ((valsBAT = BATdescriptor(*vals)) == NULL) { @@ -674,6 +683,9 @@ str ALGnonDimensionLeftfetchjoin1(bat* r throw(MAL, "algebra.leftfetchjoin", RUNTIME_OBJECT_MISSING); } + //create the oids using the candidates + candsBAT = projectCells(dimCands_in, oidCandsBAT); + resBAT = BATproject(candsBAT, valsBAT); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list