Changeset: 4ef5e5da0ac0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ef5e5da0ac0
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Nil values to each missing property of a CS (in comparing to its maxCS)


diffs (125 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3219,7 +3219,21 @@ void updateTblIdxPropIdxMap(int* tblIdxP
 
 }
 
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, 
PropStat* propStat, CStableStat *cstablestat){
+static 
+void fillMissingvalues(BAT* curBat, oid lastSubjId){
+       oid k; 
+       BUN bun; 
+       //Insert nil values to the last column if it does not have the same
+       //size as the table
+       if (curBat != NULL){
+               for(k = BATcount(curBat) -1; k < lastSubjId; k++){
+                       bun = oid_nil; 
+                       BUNappend(curBat,&bun , TRUE);
+               }
+       }
+}
+
+str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, 
PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId){
        BAT *sbat = NULL, *pbat = NULL, *obat = NULL; 
        BATiter si,pi,oi; 
        BUN p,q; 
@@ -3233,9 +3247,11 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                                        // list of that table's properties
        Postinglist tmpPtl; 
        int     tmpColIdx = -1; 
+       int     lasttblIdx = -1; 
+       int     lastColIdx = -1; 
        BUN     bun; 
        int     i,j; 
-       BAT     *curBat;
+       BAT     *curBat = NULL;
        oid     tmplastInsertedS; 
        oid     k; 
 
@@ -3277,6 +3293,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s
 
 
                if (*pbt != lastP){
+                       fillMissingvalues(curBat, lastSubjId[tblIdx]); 
+                       
                        //Get number of BATs for this p
                        ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
                        if (ppos == BUN_NONE)
@@ -3308,8 +3326,11 @@ str RDFdistTriplesToCSs(int *ret, bat *s
 
                tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; 
 
-               printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | sub 
" BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
-                                                       tmpColIdx, *pbt, 
tmpSoid, *obt); 
+               if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
+                       fillMissingvalues(curBat, lastSubjId[lasttblIdx]);
+                       lastColIdx = tmpColIdx; 
+                       lasttblIdx = tblIdx; 
+               }
 
                curBat = cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
                tmplastInsertedS = 
cstablestat->lastInsertedS[tblIdx][tmpColIdx];
@@ -3317,18 +3338,23 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                //TODO: Check last subjectId for this prop. If the subjectId is 
not continuous, insert NIL
                if (tmpSoid > (tmplastInsertedS + 1)){  
                        for (k = tmplastInsertedS; k < tmpSoid-1; k++){
-                               bun = BUN_NONE; 
+                               printf("        Add nil value \n");
+                               bun = oid_nil; 
                                BUNappend(curBat,&bun , TRUE);
                        }
                }
 
                BUNappend(curBat, obt, TRUE); 
 
+               printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | sub 
" BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
+                                                       tmpColIdx, *pbt, 
tmpSoid, *obt); 
                //Update last inserted S
                cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
        }
 
-       //Keep the batCacheId
+       fillMissingvalues(curBat, lastSubjId[tblIdx]); 
+
+       // Keep the batCacheId
        for (i = 0; i < cstablestat->numTables; i++){
                for (j = 0; j < cstablestat->numPropPerTable[i];j++){
                        cstablestat->lstbatid[i][j] = 
cstablestat->lstcstable[i].colBats[j]->batCacheid; 
@@ -3454,7 +3480,7 @@ RDFreorganize(int *ret, CStableStat *cst
                                lmap = BUNappend(lmap, &l, TRUE);
                                rmap = BUNappend(rmap, &r, TRUE);
 
-                               lastSubjId[tblIdx] = newId;
+                               lastSubjId[tblIdx]++;
                        }
 
                }
@@ -3509,13 +3535,14 @@ RDFreorganize(int *ret, CStableStat *cst
        
        printPropStat(propStat); 
 
-       if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, 
&pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat) != 
MAL_SUCCEED){
+       if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, 
&pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat,lastSubjId) 
!= MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in distributing 
triples to BATs using CSs");           
        }
                
        freeCSset(freqCSset); 
        free(subjCSMap); 
        free(csTblIdxMapping);
+       free(lastSubjId);
        //freeCStableStat(cstablestat); 
 
        BBPreclaim(lmap);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -195,7 +195,7 @@ typedef struct CStableStat {
 
 
 rdf_export str
-RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* 
propStat, CStableStat *cstablestat);
+RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* 
propStat, CStableStat *cstablestat, oid* lastSubjId);
 
 rdf_export str
 RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid, 
bat *obatid, bat *mapbatid, int *freqThreshold);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to