Changeset: 8c5e3968b78a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8c5e3968b78a Modified Files: geom/monetdb5/geom.mal geom/monetdb5/geomPoints.c monetdb5/optimizer/opt_geospatial.c Branch: geo Log Message:
geospatial optimiser handles conjunctions of spatial relations diffs (266 lines): diff --git a/geom/monetdb5/geom.mal b/geom/monetdb5/geom.mal --- a/geom/monetdb5/geom.mal +++ b/geom/monetdb5/geom.mal @@ -463,9 +463,9 @@ command Contains2(g:wkb, x:bat[:oid,:dbl command Filter(a:wkb, b:bat[:oid,:wkb]) :bat[:oid,:wkb] address wkbFilter_geom_bat comment "Filters the points in the bats according to the MBR of the other bat."; command Filter(a:bat[:oid,:wkb], b:wkb) :bat[:oid,:wkb] address wkbFilter_bat_geom; -command Filter1(geom:wkb, x:bat[:oid,:dbl], y:bat[:oid,:dbl]) :bat[:oid,:oid] address wkbFilterWithImprints_geom_bat +command Filter(geom:wkb, x:bat[:oid,:dbl], y:bat[:oid,:dbl]) :bat[:oid,:oid] address wkbFilterWithImprints_geom_bat comment "Filters x,y coordinates in the BATs using imprints"; -command Filter(geom:wkb, x:bat[:oid,:dbl], y:bat[:oid,:dbl]) :bat[:oid,:oid] address wkbFilterWithPBSM_geom_bat +command Filter1(geom:wkb, x:bat[:oid,:dbl], y:bat[:oid,:dbl]) :bat[:oid,:oid] address wkbFilterWithPBSM_geom_bat comment "Filters x,y coordinates in the BATs using PBSM"; #command point(x:bat[:oid,:dbl],y:bat[:oid,:dbl]) :bat[:oid,:wkb] diff --git a/geom/monetdb5/geomPoints.c b/geom/monetdb5/geomPoints.c --- a/geom/monetdb5/geomPoints.c +++ b/geom/monetdb5/geomPoints.c @@ -243,6 +243,9 @@ str wkbPointsContains1_geom_bat(bat* out } BBPkeepref(*outBAT_id = outBAT->batCacheid); + +// fprintf(stderr, "Contains1: IN %u - OUT %u\n", (unsigned int)BATcount(xBAT), (unsigned int)BATcount(outBAT)); + goto clean; clean: @@ -305,7 +308,7 @@ static str pnpoly_(int *out, const GEOSG BBPreleaseref(bpy->batCacheid); throw(MAL, "geom.point", "both point bats must have dense and aligned heads"); } - +; /*Create output BAT*/ if ((bo = BATnew(TYPE_void, ATOMindex("bte"), BATcount(bpx), TRANSIENT)) == NULL) { BBPreleaseref(bpx->batCacheid); @@ -369,6 +372,7 @@ static str pnpoly_(int *out, const GEOSG GDKfree(xPoints); GDKfree(yPoints); +//fprintf(stderr, "Contains2: IN %u - OUT %u\n", (unsigned int)BATcount(bpx), (unsigned int)BATcount(bo)); return MAL_SUCCEED; } diff --git a/monetdb5/optimizer/opt_geospatial.c b/monetdb5/optimizer/opt_geospatial.c --- a/monetdb5/optimizer/opt_geospatial.c +++ b/monetdb5/optimizer/opt_geospatial.c @@ -1,23 +1,42 @@ #include "monetdb_config.h" #include "opt_geospatial.h" + +typedef struct { + int first; + int second; +} arguments; + +static arguments subselectInputs[5]; +static int spatials; + +static arguments projectInputs[5]; +//static int subselectFirstInput[5]; +//static int subselectSecondInput[5]; +//static int foundItems = 0; +static int j = 0; +//static int subselectsReturnArgument[5]; +//static int projectReturnArgument[5]; +static int subselects =0; + static void createFilterInstruction(MalBlkPtr mb, InstrPtr *oldInstrPtr, int instructionNum, int filterFirstArgument) { - InstrPtr filterInstrPtr, projectXInstrPtr, projectYInstrPtr, projectInstrPtr; - int filterReturnId, subselectReturnId, projectXReturnId, projectYReturnId; + InstrPtr filterInstrPtr, projectXInstrPtr, projectYInstrPtr;//, subselectInstrPtr, projectInstrPtr; + int filterReturnId, projectXReturnId, projectYReturnId;//, subselectReturnId; //create and put in the MAL plan the new instructions filterInstrPtr = newStmt(mb, "batgeom", "Filter"); projectXInstrPtr = newStmt(mb, "algebra", "leftfetchjoin"); projectYInstrPtr = newStmt(mb, "algebra", "leftfetchjoin"); pushInstruction(mb, oldInstrPtr[instructionNum]); - pushInstruction(mb, oldInstrPtr[instructionNum+1]); - projectInstrPtr = newStmt(mb, "algebra", "leftfetchjoin"); +// //pushInstruction(mb, oldInstrPtr[instructionNum+1]); +// subselectInstrPtr = newStmt(mb, "algebra", "subselect"); +// projectInstrPtr = newStmt(mb, "algebra", "leftfetchjoin"); //make new return variables filterReturnId = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); - projectXReturnId = newVariable(mb, GDKstrdup("xBATfiltered"), getArgType(mb,oldInstrPtr[instructionNum],2)); - projectYReturnId = newVariable(mb, GDKstrdup("yBATfiltered"), getArgType(mb,oldInstrPtr[instructionNum],3)); - subselectReturnId = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); + projectXReturnId = newTmpVariable(mb, getArgType(mb,oldInstrPtr[instructionNum],2)); + projectYReturnId = newTmpVariable(mb, getArgType(mb,oldInstrPtr[instructionNum],3)); +// subselectReturnId = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); //set the arguments for filter setReturnArgument(filterInstrPtr, filterReturnId); @@ -40,26 +59,101 @@ static void createFilterInstruction(MalB setArgument(mb, oldInstrPtr[instructionNum], 2, projectXReturnId); delArgument(oldInstrPtr[instructionNum], 3); setArgument(mb, oldInstrPtr[instructionNum], 3, projectYReturnId); + + //store the return variable of the spatial function + subselectInputs[spatials].first = getArg(oldInstrPtr[instructionNum],0); + subselectInputs[spatials].second = filterReturnId; +//fprintf(stderr, "%d -> SpatialReturnId: %d, FilterReturnId: %d\n", foundItems, subselectFirstInput[foundItems], subselectSecondInput[foundItems]); + spatials++; - //the new subselect does not use candidates +/* //the new subselect does not use candidates setReturnArgument(projectInstrPtr, getArg(oldInstrPtr[instructionNum+1],0)); //get the variable before changing it setReturnArgument(oldInstrPtr[instructionNum+1], subselectReturnId); if(oldInstrPtr[instructionNum+1]->argc == 8) delArgument(oldInstrPtr[instructionNum+1], 2); - + //add a new function that gets the oids of the original BAT that qualified the spatial function projectInstrPtr = pushArgument(mb, projectInstrPtr, subselectReturnId); projectInstrPtr = pushArgument(mb, projectInstrPtr, filterReturnId); - +*/ } +static void fixSubselect(MalBlkPtr mb, InstrPtr *oldInstrPtr, int instructionNum, int filterReturnId) { + InstrPtr projectInstrPtr; + int subselectReturnId, projectReturnId; + int k=0; + projectInstrPtr = newStmt(mb, "algebra", "leftfetchjoin"); + + + //get the return variable of this subselect + projectInputs[subselects].first = getArg(oldInstrPtr[instructionNum],0); + //unless the subselect involves results from other subselect the return variable of + //it will be the return variable of the projection + projectReturnId = projectInputs[subselects].first; + + //create the new subselect command that does not use any candidates + subselectReturnId = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); + setReturnArgument(oldInstrPtr[instructionNum], subselectReturnId); + if(oldInstrPtr[instructionNum]->argc == 8) { + fprintf(stderr, "secondArg %d\n", getArg(oldInstrPtr[instructionNum], 2)); + + //check if the second argument to this subselect is something coming from another subselect + for(k = 0; k<subselects; k++) { + if(getArg(oldInstrPtr[instructionNum], 2) == projectInputs[k].first) { + InstrPtr joinInstrPtr = newStmt(mb, "algebra", "subjoin"); + InstrPtr extraProjectInstrPtr = newStmt(mb, "algebra", "leftfetchjoin"); //to get the oids from the original BAT that correspond to the oids tha satisfy the join + int joinReturnId1 = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); + int joinReturnId2 = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); + projectReturnId = newTmpVariable(mb, newBatType(TYPE_oid, TYPE_oid)); + + + setReturnArgument(joinInstrPtr, joinReturnId1); + joinInstrPtr = pushArgument(mb, joinInstrPtr, joinReturnId2); + joinInstrPtr = pushArgument(mb, joinInstrPtr, projectReturnId); //the first BAT is the result of the projection of this subselect + joinInstrPtr = pushArgument(mb, joinInstrPtr, projectInputs[k].first); //the second BAT is the results of the projection of the other subselect + joinInstrPtr = pushNil(mb, joinInstrPtr, TYPE_bat); + joinInstrPtr = pushNil(mb, joinInstrPtr, TYPE_bat); + joinInstrPtr = pushBit(mb, joinInstrPtr, 0); //do not match null values + joinInstrPtr = pushNil(mb, joinInstrPtr, TYPE_lng); //I do not have an estimation of the size + joinInstrPtr->retc=2; + + setReturnArgument(extraProjectInstrPtr, projectInputs[subselects].first); + extraProjectInstrPtr = pushArgument(mb, extraProjectInstrPtr, joinReturnId1); //the result of the join + extraProjectInstrPtr = pushArgument(mb, extraProjectInstrPtr, projectReturnId); //the BAT used for the join computation + + } + } + delArgument(oldInstrPtr[instructionNum], 2); + + } + + + //add a new function that gets the oids of the original BAT that qualified the spatial function + setReturnArgument(projectInstrPtr, projectReturnId); + projectInstrPtr = pushArgument(mb, projectInstrPtr, subselectReturnId); + projectInstrPtr = pushArgument(mb, projectInstrPtr, filterReturnId); + + subselects++; +} + +static int getSubselectSecondInput(int currentSubselectFirstInput) { + int k=0; + + for(k=0; k<spatials ; k++) + if(currentSubselectFirstInput == subselectInputs[k].first) + return subselectInputs[k].second; + return -1; +} int OPTgeospatialImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { int i=0, actions = 0; int nextFreeSlot = mb->stop; //up to this there are instructions InstrPtr *oldInstrPtr = mb->stmt; //pointer to the first instruction int slimit = mb->ssize; //what is - + + spatials = 0; + subselects = 0; + (void) pci; (void) stk; (void) cntxt; @@ -70,17 +164,22 @@ int OPTgeospatialImplementation(Client c //iterate over the instructions - for(i=0; i<nextFreeSlot; i++) { + for(i=0; i<nextFreeSlot; i++) { + //chech the module and function name + if(getModuleId(oldInstrPtr[i]) && !idcmp(getModuleId(oldInstrPtr[i]),"algebra") && !idcmp(getFunctionId(oldInstrPtr[i]), "subselect")) { + int filterReturnId = 0; + pushInstruction(mb, oldInstrPtr[i]); - //chech the module and function name - if(getModuleId(oldInstrPtr[i]) && !strcasecmp(getModuleId(oldInstrPtr[i]),"batgeom")) { + if((filterReturnId = getSubselectSecondInput(getArg(oldInstrPtr[i], 1))) > 0) + fixSubselect(mb, oldInstrPtr, i, filterReturnId); + } else if(getModuleId(oldInstrPtr[i]) && !strcasecmp(getModuleId(oldInstrPtr[i]),"batgeom")) { if((strcasecmp(getFunctionId(oldInstrPtr[i]), "contains1") == 0) || (strcasecmp(getFunctionId(oldInstrPtr[i]), "contains2") == 0)) { if(oldInstrPtr[i]->argc == 5) { //call all necessary intructions for the filter and the evaluation of the spatial relation createFilterInstruction(mb, oldInstrPtr, i, getArg(oldInstrPtr[i],1)); - //skip the algebra.subselect command - i++; + ////skip the algebra.subselect command + //i++; actions += 5; } else { @@ -140,8 +239,8 @@ int OPTgeospatialImplementation(Client c //call all necessary intructions for the filter and the evaluation of the spatial relation createFilterInstruction(mb, oldInstrPtr, i, bufferReturnId); - //skip the algebra.thetasubselect command - i++; + ////skip the algebra.thetasubselect command + //i++; actions += 5; } else { @@ -189,10 +288,20 @@ int OPTgeospatialImplementation(Client c actions+=3; } - } else //put back all other instructions from batgeom + } else {//put back all other instructions from batgeom pushInstruction(mb, oldInstrPtr[i]); - } else //put all other instructions back + } + } else { //put all other instructions back pushInstruction(mb, oldInstrPtr[i]); + + //I need to track the first input to the subselect + for(j=0; j<spatials ; j++) { + if(getArg(oldInstrPtr[i],1) == subselectInputs[j].first) { + subselectInputs[j].first = getArg(oldInstrPtr[i], 0); + break; + } + } + } } GDKfree(oldInstrPtr); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list