Changeset: 4ef5e5da0ac0 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ef5e5da0ac0 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Nil values to each missing property of a CS (in comparing to its maxCS) diffs (125 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -3219,7 +3219,21 @@ void updateTblIdxPropIdxMap(int* tblIdxP } -str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, CStableStat *cstablestat){ +static +void fillMissingvalues(BAT* curBat, oid lastSubjId){ + oid k; + BUN bun; + //Insert nil values to the last column if it does not have the same + //size as the table + if (curBat != NULL){ + for(k = BATcount(curBat) -1; k < lastSubjId; k++){ + bun = oid_nil; + BUNappend(curBat,&bun , TRUE); + } + } +} + +str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId){ BAT *sbat = NULL, *pbat = NULL, *obat = NULL; BATiter si,pi,oi; BUN p,q; @@ -3233,9 +3247,11 @@ str RDFdistTriplesToCSs(int *ret, bat *s // list of that table's properties Postinglist tmpPtl; int tmpColIdx = -1; + int lasttblIdx = -1; + int lastColIdx = -1; BUN bun; int i,j; - BAT *curBat; + BAT *curBat = NULL; oid tmplastInsertedS; oid k; @@ -3277,6 +3293,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s if (*pbt != lastP){ + fillMissingvalues(curBat, lastSubjId[tblIdx]); + //Get number of BATs for this p ppos = BUNfnd(BATmirror(propStat->pBat),pbt); if (ppos == BUN_NONE) @@ -3308,8 +3326,11 @@ str RDFdistTriplesToCSs(int *ret, bat *s tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; - printf(BUNFMT": Table %d | column %d for prop " BUNFMT " | sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx, - tmpColIdx, *pbt, tmpSoid, *obt); + if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){ + fillMissingvalues(curBat, lastSubjId[lasttblIdx]); + lastColIdx = tmpColIdx; + lasttblIdx = tblIdx; + } curBat = cstablestat->lstcstable[tblIdx].colBats[tmpColIdx]; tmplastInsertedS = cstablestat->lastInsertedS[tblIdx][tmpColIdx]; @@ -3317,18 +3338,23 @@ str RDFdistTriplesToCSs(int *ret, bat *s //TODO: Check last subjectId for this prop. If the subjectId is not continuous, insert NIL if (tmpSoid > (tmplastInsertedS + 1)){ for (k = tmplastInsertedS; k < tmpSoid-1; k++){ - bun = BUN_NONE; + printf(" Add nil value \n"); + bun = oid_nil; BUNappend(curBat,&bun , TRUE); } } BUNappend(curBat, obt, TRUE); + printf(BUNFMT": Table %d | column %d for prop " BUNFMT " | sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx, + tmpColIdx, *pbt, tmpSoid, *obt); //Update last inserted S cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid; } - //Keep the batCacheId + fillMissingvalues(curBat, lastSubjId[tblIdx]); + + // Keep the batCacheId for (i = 0; i < cstablestat->numTables; i++){ for (j = 0; j < cstablestat->numPropPerTable[i];j++){ cstablestat->lstbatid[i][j] = cstablestat->lstcstable[i].colBats[j]->batCacheid; @@ -3454,7 +3480,7 @@ RDFreorganize(int *ret, CStableStat *cst lmap = BUNappend(lmap, &l, TRUE); rmap = BUNappend(rmap, &r, TRUE); - lastSubjId[tblIdx] = newId; + lastSubjId[tblIdx]++; } } @@ -3509,13 +3535,14 @@ RDFreorganize(int *ret, CStableStat *cst printPropStat(propStat); - if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, &pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat) != MAL_SUCCEED){ + if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, &pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat,lastSubjId) != MAL_SUCCEED){ throw(RDF, "rdf.RDFreorganize", "Problem in distributing triples to BATs using CSs"); } freeCSset(freqCSset); free(subjCSMap); free(csTblIdxMapping); + free(lastSubjId); //freeCStableStat(cstablestat); BBPreclaim(lmap); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -195,7 +195,7 @@ typedef struct CStableStat { rdf_export str -RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, CStableStat *cstablestat); +RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId); rdf_export str RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid, bat *obatid, bat *mapbatid, int *freqThreshold); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list