Changeset: 0f6ada9dccb7 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0f6ada9dccb7 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Create function for distribute each triples from PSO table to CS tables diffs (205 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -1903,20 +1903,22 @@ void getPropStatisticsFromMaxCSs(PropSta static -PropStat* getPropStatisticsFromFreqCSs(CSset* freqCSset){ - - int i, j; +PropStat* getPropStatisticsFromFreqCSs(CSset* freqCSset, int *numdistinctMCS){ + + int i, j, k; CS cs; PropStat* propStat; propStat = initPropStat(); + k = 0; + for (i = 0; i < freqCSset->numCSadded; i++){ if (freqCSset->items[i].parentFreqIdx == -1){ // Only use the maximum or merge CS cs = (CS)freqCSset->items[i]; - + k++; for (j = 0; j < cs.numProp; j++){ addaProp(propStat, cs.lstProp[j], i); } @@ -1930,6 +1932,8 @@ PropStat* getPropStatisticsFromFreqCSs(C } */ + *numdistinctMCS = k; + return propStat; } @@ -2987,14 +2991,15 @@ BAT* getOriginalOBat(BAT *obat){ return origobat; } -/* + static -oid getTblidFromSoid(oid Soid){ +int getTblidFromSoid(oid Soid){ int freqCSid; + freqCSid = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID)) & ((1 << (NBITS_FOR_CSID-1)) - 1)) ; + return freqCSid; } -*/ static str triplesubsort(BAT **sbat, BAT **pbat, BAT **obat){ @@ -3032,6 +3037,97 @@ str triplesubsort(BAT **sbat, BAT **pbat return MAL_SUCCEED; } +static +CStable* initCStables(PropStat* propStat, int num){ + CStable* cstable; + int i; + + cstable = (CStable *) malloc (sizeof (CStable)); + cstable->lstbatid = (bat**) malloc(sizeof (bat*) * (propStat->numAdded)); + for(i = 0; i < propStat->numAdded;i++){ + cstable->lstbatid[i] = (bat*)malloc(sizeof(bat) * propStat->plCSidx[i].numAdded); + } + cstable->numTables = num; + return cstable; +} + +str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, int numdistinctMCS){ + BAT *sbat = NULL, *pbat = NULL, *obat = NULL; + BATiter si,pi,oi; + BUN p,q; + oid *pbt, *sbt, *obt; + oid lastP, lastS; + CStable *cstable; + int freqid; + BUN ppos; + + BAT** setofBats = NULL; + int* csIdBatidxmap; //small map for each p + + if ((sbat = BATdescriptor(*sbatid)) == NULL) { + throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING); + } + if ((pbat = BATdescriptor(*pbatid)) == NULL) { + BBPreleaseref(sbat->batCacheid); + throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING); + } + if ((obat = BATdescriptor(*obatid)) == NULL) { + BBPreleaseref(sbat->batCacheid); + BBPreleaseref(pbat->batCacheid); + throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING); + } + + si = bat_iterator(sbat); + pi = bat_iterator(pbat); + oi = bat_iterator(obat); + + lastP = BUN_NONE; + lastS = BUN_NONE; + //Init cstable + cstable = initCStables(propStat, numdistinctMCS); + printf("Created cstable with %d tables \n", cstable->numTables); + + BATloop(pbat, p, q){ + pbt = (oid *) BUNtloc(pi, p); + sbt = (oid *) BUNtloc(si, p); + obt = (oid *) BUNtloc(oi, p); + if (*pbt != lastP){ + if (csIdBatidxmap == NULL){ + csIdBatidxmap = (int *) malloc(sizeof(int) * 1); + } + + //Get number of BATs for this p + ppos = BUNfnd(BATmirror(propStat->pBat),pbt); + if (ppos == BUN_NONE) + throw(RDF, "rdf.RDFdistTriplesToCSs", "This prop must be in propStat bat"); + + if (setofBats != NULL) + free(setofBats); + + //init set of BATs containing this property + setofBats = (BAT**) malloc(sizeof(BAT*) * propStat->plCSidx[ppos].numAdded); + + + freqid = getTblidFromSoid(*sbt); + printf("Table for prop " BUNFMT " | obj " BUNFMT "is %d \n",*pbt, *obt, freqid); + + lastP = *pbt; + } + else if (*sbt != lastS){ + lastS = *sbt; + } + + + } + + *ret = 1; + + BBPunfix(sbat->batCacheid); + BBPunfix(pbat->batCacheid); + BBPunfix(obat->batCacheid); + + return MAL_SUCCEED; +} str RDFreorganize(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, int *freqThreshold){ @@ -3053,6 +3149,7 @@ RDFreorganize(int *ret, bat *sbatid, bat bat oNewBatid, pNewBatid; oid *csMFreqCSMap; /* Store the mapping from a CS id to an index of a maxCS or mergeCS in freqCSset. */ PropStat *propStat; + int numdistinctMCS = 0; freqCSset = initCSset(); @@ -3183,8 +3280,12 @@ RDFreorganize(int *ret, bat *sbatid, bat BATprint(sNewBat); - propStat = getPropStatisticsFromFreqCSs(freqCSset); + propStat = getPropStatisticsFromFreqCSs(freqCSset, &numdistinctMCS); printPropStat(propStat); + + if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, &pNewBat->batCacheid, &oNewBat->batCacheid, propStat, numdistinctMCS) != MAL_SUCCEED){ + throw(RDF, "rdf.RDFreorganize", "Problem in distributing triples to BATs using CSs"); + } freeCSset(freqCSset); free(subjCSMap); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -35,6 +35,8 @@ RDFextractCSwithTypes(int *ret, bat *sba rdf_export str RDFreorganize(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, int *freqThreshold); + + typedef enum { NORMALCS, FREQCS, @@ -171,4 +173,14 @@ typedef struct CSmergeRel{ int numAllocation; } CSmergeRel; +typedef struct CStable { + bat** lstbatid; + int numTables; + oid* lastInsertedS; +} CStable; + + +rdf_export str +RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, int numTables); + #endif /* _RDFSCHEMA_H_ */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list