Changeset: c85a5f2c0824 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c85a5f2c0824 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Update frequency and coverage of each column after moving each triple to PSO table diffs (146 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -5946,7 +5946,9 @@ str initFullSampleData(CSSampleExtend *c colIdx++; csSampleEx[i].lstProp[colIdx] = csPropTypes[i].lstPropTypes[j].prop; csSampleEx[i].lstPropSupport[colIdx] = csPropTypes[i].lstPropTypes[j].propFreq; - + + if (csPropTypes[i].lstPropTypes[j].propFreq == 0) printf("[Verify] Empty Bat at table %d col %d Prop "BUNFMT "\n",i,colIdx,csPropTypes[i].lstPropTypes[j].prop); + //Mark whther this col is a MV col csSampleEx[i].lstIsMVCol[colIdx] = csPropTypes[i].lstPropTypes[j].isMVProp; @@ -8888,7 +8890,34 @@ void getRealValue(ValPtr returnValue, oi } } - +static +void updatePropTypeForRemovedTriple(CSPropTypes *csPropTypes, int* tmpTblIdxPropIdxMap, int tblIdx, oid *subjCSMap, int* csTblIdxMapping, oid sbt, oid pbt, oid *lastRemovedProp, oid* lastRemovedSubj, char isMultiToSingleProp){ + int tmptblIdx, tmpPropIdx; + + if (tblIdx == -1) + tmptblIdx = csTblIdxMapping[subjCSMap[sbt]]; + else + tmptblIdx = tblIdx; + + tmpPropIdx = tmpTblIdxPropIdxMap[tmptblIdx]; + //if (tmptblIdx == 3 && tmpPropIdx == 51) printf("Removing <p> <s> : " BUNFMT " | " BUNFMT "\n",pbt,sbt); + //Update PropTypes + if (isMultiToSingleProp){ + csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propCover--; + return; + } + + if (pbt != *lastRemovedProp || sbt != *lastRemovedSubj){ + csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propCover--; + csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propFreq--; + + *lastRemovedProp = pbt; + *lastRemovedSubj = sbt; + } + else{ //Multivalue + csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propCover--; + } +} //Macro for inserting to PSO #define insToPSO(pb, sb, ob, pbt, sbt, obt) \ @@ -8905,7 +8934,7 @@ void getRealValue(ValPtr returnValue, oi }while (0) -str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj){ +str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj, oid *subjCSMap, int* csTblIdxMapping){ BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL, *lmap = NULL, *rmap = NULL; BATiter si,pi,oi, mi; @@ -8961,7 +8990,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s int initHashBatgz = 0; BUN tmpFKRefBun = BUN_NONE; char isFKCol = 0; - #endif + #endif + + oid lastRemovedSubj = BUN_NONE; + oid lastRemovedProp = BUN_NONE; (void) isLotsNullSubj; @@ -9092,6 +9124,9 @@ str RDFdistTriplesToCSs(int *ret, bat *s if (tblIdx == -1 && isLotsNullSubj[*sbt]){ // A lots-of-null subject insToPSO(cstablestat->pbat,cstablestat->sbat, cstablestat->obat, pbt, sbt, obt); + + //Update propTypes + updatePropTypeForRemovedTriple(csPropTypes, tmpTblIdxPropIdxMap, tblIdx, subjCSMap, csTblIdxMapping, *sbt, *pbt, &lastRemovedProp, &lastRemovedSubj,0); continue; } @@ -9112,6 +9147,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s if (objType != URI){ //Must be a dirty one --> put to pso //printf("Dirty FK at tbl %d | propId " BUNFMT " \n", tblIdx, *pbt); insToPSO(cstablestat->pbat,cstablestat->sbat, cstablestat->obat, pbt, sbt, obt); + + //Update propTypes + updatePropTypeForRemovedTriple(csPropTypes, tmpTblIdxPropIdxMap,tblIdx, subjCSMap, csTblIdxMapping, *sbt, *pbt, &lastRemovedProp, &lastRemovedSubj,0); + continue; } else{ // @@ -9119,6 +9158,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s if (tmpOidTblIdx != csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].refTblId){ //printf("Dirty FK at tbl %d | propId " BUNFMT " \n", tblIdx, *pbt); insToPSO(cstablestat->pbat,cstablestat->sbat, cstablestat->obat, pbt, sbt, obt); + + //Update propTypes + updatePropTypeForRemovedTriple(csPropTypes, tmpTblIdxPropIdxMap,tblIdx, subjCSMap, csTblIdxMapping, *sbt, *pbt, &lastRemovedProp, &lastRemovedSubj,0); + continue; } } @@ -9391,6 +9434,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s else{ // This is an extra object value insToPSO(cstablestat->pbat,cstablestat->sbat, cstablestat->obat, pbt, sbt, obt); //printf(" Extra object value ==> To PSO \n"); + + //Update propTypes + updatePropTypeForRemovedTriple(csPropTypes, tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt, &lastRemovedProp, &lastRemovedSubj,1); + continue; } } @@ -9402,6 +9449,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s if (tmpTableType == PSOTBL){ //For infrequent type ---> go to PSO insToPSO(cstablestat->pbat,cstablestat->sbat, cstablestat->obat, pbt, sbt, obt); //printf(" ==> To PSO \n"); + + //Update propTypes + updatePropTypeForRemovedTriple(csPropTypes, tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt, &lastRemovedProp, &lastRemovedSubj,0); + continue; } @@ -9841,7 +9892,7 @@ RDFreorganize(int *ret, CStableStat *cst printf (" Prepare and create sub-sorted PSO took %f seconds.\n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC); tmpLastT = curT; returnStr = RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, &pNewBat->batCacheid, &oNewBat->batCacheid, mapbatid, - &lmap->batCacheid, &rmap->batCacheid, propStat, cstablestat, csPropTypes, lastSubjId, isLotsNullSubj); + &lmap->batCacheid, &rmap->batCacheid, propStat, cstablestat, csPropTypes, lastSubjId, isLotsNullSubj, subjCSMap, csTblIdxMapping); printf("Return value from RDFdistTriplesToCSs is %s \n", returnStr); if (returnStr != MAL_SUCCEED){ throw(RDF, "rdf.RDFreorganize", "Problem in distributing triples to BATs using CSs"); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -449,7 +449,7 @@ typedef struct CSSampleExtend{ } CSSampleExtend; rdf_export str -RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,bat *mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj); +RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,bat *mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj, oid *subjCSMap, int* csTblIdxMapping); rdf_export str RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, bat *ontbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, oid *maxCSoid, int *maxNumPwithDup, CSlabel** labels, CSrel **csRelBetweenMergeFreqSet); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list